import pandas as pd
import matplotlib.pyplot as plt

# 设置中文字体和负号正常显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 读取数据
df = pd.read_csv('C:\\Users\\LENOVO\\Desktop\\数据可视化\\job_clean.csv', sep=',', header=None, names=[
    "id", "job_type", "job_title", "annual_salary", "company_type", "company_size", "city"
])

# 过滤远程办公，选取前30城市
df_city_filtered = df[df['city'] != '远程办公']
top_30_cities = df_city_filtered['city'].value_counts().head(30).index
city_salary_top30 = df_city_filtered[df_city_filtered['city'].isin(top_30_cities)].groupby('city')['annual_salary'].mean().sort_values()

# 绘图
plt.figure(figsize=(10, 8))
plt.barh(city_salary_top30.index, city_salary_top30.values, color='skyblue')
plt.xlabel("平均年薪")
plt.title("不同城市的平均年薪（Top 30，不含远程办公）")
plt.tight_layout()
plt.show()
